# 测试案例2

from pyspark import SparkContext,SparkConf
import os
os.environ["PYSPARK_PYTHON"] = "D:/Soft/Python/Python310/python.exe"
import json

if __name__ == '__main__':
    conf = SparkConf().setMaster("local[*]").setAppName("test")
    sc = SparkContext(conf=conf)

    rdd=sc.textFile("data/orders.txt")
    rdd_order=rdd.flatMap(lambda ele:ele.split("|")).map(lambda ele:json.loads(ele))

    # 城市销售额,并从大到小排序
    result1=rdd_order.map(lambda ele: (ele["areaName"], int(ele["money"]))).reduceByKey(lambda a, b: a + b).sortBy(
        lambda ele: ele[1], ascending=False, numPartitions=1).collect()
    print(result1)

    # 全部城市有哪些商品类别在售卖
    result2=rdd_order.map(lambda ele:ele["category"]).distinct().collect()
    print(result2)

    # 北京市有哪些商品类别在售卖
    result3=rdd_order.filter(lambda ele:ele["areaName"]=="北京").map(lambda ele:ele["category"]).distinct().collect()
    print(result3)

    sc.stop()
